import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
from urllib.request import urlopen
import json
# with urlopen('https://raw.githubusercontent.com/Subhash9325/GeoJson-Data-of-Indian-States/master/Indian_States') as response:
# counties = json.load(response)
with urlopen('https://raw.githubusercontent.com/nikhilkumarsingh/choropleth-python-tutorial/master/states_india.geojson') as response:
counties = json.load(response)
print(counties["features"][0].keys())
# Note that the loaded geoJSON must have an outermost key id. Note key id is not present here
dict_keys(['type', 'geometry', 'properties'])
for i in range(len(counties['features'])):
# Loop to add 'id' key
counties['features'][i]['id']=counties['features'][i]['properties']['st_nm']
# counties['features'][i]['id']=counties['features'][i]['properties']['NAME_1']
print(counties["features"][0].keys())
dict_keys(['type', 'geometry', 'properties', 'id'])
names = []
for i in range(len(counties['features'])):
names.append(counties['features'][i]['id'])
df = pd.read_csv("data/NCRB_ADSI-2020_Table_1A.9.csv")
df = df[(df['Category']=='State') + (df['Category']=='Union Territories')]
Note : In geoJSON data, there is no separate data for Ladakh and Jammu Kashmir. However, in csv file there are two separate rows. Therefore merging the rows of Ladakh and Jammu Kashmir in dataframe. Also in geoJSON file there are two places 'Daman & Diu' and 'Dadara & Nagar Havelli', however, there is only one row named 'D & N Haveli and Daman & Diu' in dataframe. Therefore a copy of that row is made and used for both the places.
id1=df.index[df['State/UT/City'] == 'Jammu & Kashmir'].tolist()[0]
id2=df.index[df['State/UT/City'] == 'Ladakh'].tolist()[0]
temp = df.loc[[id1,id2]]
temp = pd.concat([temp, (temp.sum(axis=0)).to_frame().transpose()])
temp = temp.drop(labels=[id1,id2],axis=0)
temp = temp.rename(index={0:id1})
temp.loc[id1,'Si. No.'] = str(id1)
temp.loc[id1,'Category'] = 'Union Territories'
temp.loc[id1,'State/UT/City'] = 'Jammu & Kashmir & Ladakh'
df = df.drop(labels=[id1,id2])
df = pd.concat([df,temp])
id1=df.index[df['State/UT/City'] == 'D & N Haveli and Daman & Diu'].tolist()[0]
temp = df.loc[id1].to_frame().transpose()
temp['State/UT/City']='D & N Haveli and Daman & Diu-2'
df = pd.concat([df,temp])
df = df.sort_values('State/UT/City', axis=0)
df.head()
| Si. No. | Category | State/UT/City | Dangerous or Careless Driving/ Over-taking/etc. - Cases | Dangerous or Careless Driving/ Over-taking/etc. - Injured | Dangerous or Careless Driving/ Over-taking/etc. - Died | Over Speeding - Cases | Over Speeding - Injured | Over Speeding - Died | Driving under Influence of Drug/Alcohol - Cases | ... | Other Causes - Died | Total Road Accidents - Cases | Total Road Accidents - Injured | Total Road Accidents - Died | Unmanned Railway Crossing Accidents - Cases | Unmanned Railway Crossing Accidents - Injured | Unmanned Railway Crossing Accidents - Died | Grand Total - Cases | Grand Total - Injured | Grand Total - Died | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 29 | 29 | Union Territories | A & N Islands | 82 | 90 | 8 | 26 | 31 | 2 | 33 | ... | 0 | 141 | 145 | 14 | 0 | 0 | 0 | 141 | 145 | 14 |
| 0 | 1 | State | Andhra Pradesh | 3300 | 3630 | 1221 | 12344 | 13774 | 4778 | 154 | ... | 668 | 17924 | 19675 | 7039 | 0 | 0 | 0 | 17924 | 19675 | 7039 |
| 1 | 2 | State | Arunachal Pradesh | 25 | 15 | 15 | 39 | 32 | 23 | 0 | ... | 19 | 159 | 143 | 90 | 0 | 0 | 0 | 159 | 143 | 90 |
| 2 | 3 | State | Assam | 1169 | 893 | 462 | 3293 | 2530 | 1377 | 491 | ... | 33 | 6737 | 5065 | 2813 | 1 | 0 | 1 | 6738 | 5065 | 2814 |
| 3 | 4 | State | Bihar | 2564 | 1936 | 1881 | 3674 | 3072 | 2785 | 152 | ... | 150 | 8639 | 7019 | 6698 | 0 | 0 | 0 | 8639 | 7019 | 6698 |
5 rows × 51 columns
df_cols = list(df.columns)
for i in range(3,len(df_cols)):
df[df_cols[i]]=df[df_cols[i]].astype('int64')
names.remove('NCT of Delhi')
names.append('Delhi')
names.sort()
# Create a dictionary to map state name of dataframe to geo location
df2geo = {}
df_name = list(df['State/UT/City'])
for i in range(df.shape[0]):
df2geo[df.iloc[i,2]]=names[i]
df2geo['Delhi (UT)']='NCT of Delhi'
df['name2geo']=df['State/UT/City'].map(df2geo)
fig = px.choropleth_mapbox(df, geojson=counties, locations='name2geo',
color='Grand Total - Died',
color_continuous_scale="Viridis",
mapbox_style="carto-positron",
zoom=3, center = {"lat": 20.5937, "lon": 78.9629},
opacity=0.5
)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()